AI 翻譯器（多語、術語表、品質檢查）

17th鐵人賽

frankfrank8785

2025-09-18 08:59:45

112 瀏覽

分享至

🆕 新增／修改的程式碼

src/utils/lang.js（新增）
// src/utils/lang.js
export const LANG_MAP = {
"zh-TW": "繁體中文",
"zh-CN": "簡體中文",
"en": "English",
"ja": "日本語",
"ko": "한국어",
"es": "Español",
"fr": "Français",
"de": "Deutsch",
};

export function normalizeLang(code) {
if (!code) return "";
const c = code.trim();
if (LANG_MAP[c]) return c;
// 寬鬆別名
const lower = c.toLowerCase();
if (lower === "zh" || lower === "zh_tw" || lower === "zh-hant") return "zh-TW";
if (lower === "zh_cn" || lower === "zh-hans") return "zh-CN";
return LANG_MAP[c] ? c : "";
}

export function assertSupported(target) {
if (!LANG_MAP[target]) {
const list = Object.keys(LANG_MAP).join(", ");
throw new Error(不支援的語言代碼：${target}，請使用其中之一：${list});
}
}

src/day12_translator.js（新增）
// src/day12_translator.js
import { openai } from "./aiClient.js";
import { LANG_MAP, normalizeLang, assertSupported } from "./utils/lang.js";

/**

將 glossary 轉成提示文字（雙向約束）
glossary 例：
[{ source: "Retrieval-Augmented Generation", target: "檢索增強生成" },
{ source: "embedding", target: "嵌入向量" }]
*/
function buildGlossaryHint(glossary = []) {
if (!Array.isArray(glossary) || glossary.length === 0) return "";
const lines = glossary
.filter(g => g?.source && g?.target)
.map(g => - ${g.source} => ${g.target});
if (!lines.length) return "";
return [
"嚴格遵守下列術語對照，不可擅自改動：",
lines.join("\n"),
"若原文已是目標語言但包含上述術語，仍需以對照表標準化。"
].join("\n");
}

/**

嘗試保護 Markdown 程式碼區塊：要求模型原樣保留 區段、不翻譯其中內容 */ function buildFormatProtectionHint(preserveFormat = true) { if (!preserveFormat) return ""; return [ "如原文含 Markdown/程式碼區塊（ 或 inline），",
"請原樣保留程式碼與符號，僅翻譯敘述文字；連結的 URL 不要翻譯。"
].join("\n");
}

/**

單筆翻譯
@param {Object} opts
@param {string} opts.text - 原文
@param {string} [opts.sourceLang] - 來源語言代碼（可省略，自動判斷）
@param {string} opts.targetLang - 目標語言代碼（必填，如 zh-TW / en / ja ...）
@param {Array<{source:string,target:string}>} [opts.glossary] - 術語對照
@param {boolean} [opts.preserveFormat=true] - 保留 Markdown/程式碼格式
@param {("formal"|"neutral"|"casual")} [opts.tone="neutral"] - 語氣
@returns {Promise<{ source:string, target:string, detectedSourceLang?:string }>}
*/
export async function translateOne(opts = {}) {
const {
text = "",
sourceLang = "",
targetLang = "zh-TW",
glossary = [],
preserveFormat = true,
tone = "neutral",
} = opts;

if (!text?.trim()) throw new Error("text 為必填。");
const tgt = normalizeLang(targetLang) || targetLang;
assertSupported(tgt);

const sys = [
你是專業的翻譯員，將輸入文字翻譯為 ${LANG_MAP[tgt]}。,
語氣：${tone}；除非為必要語法調整，請忠實於原文。,
buildFormatProtectionHint(preserveFormat),
buildGlossaryHint(glossary),
"若輸入已是目標語言，請僅進行用詞標準化與小幅潤飾，不可改變技術含義。",
].filter(Boolean).join("\n");

const user = [
sourceLang ? 來源語言：${LANG_MAP[normalizeLang(sourceLang)] || sourceLang} : "來源語言：自動判斷",
"請只回覆純文字譯文，不要加註任何說明。",
"==== 原文開始 ====",
text,
"==== 原文結束 ====",
].join("\n");

const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.2,
messages: [
{ role: "system", content: sys },
{ role: "user", content: user }
],
});

const target = res.choices?.[0]?.message?.content?.trim() || "";
return { source: text, target, detectedSourceLang: sourceLang ? undefined : "(model-detected)" };
}

/**

批次翻譯
@param {Object} opts
@param {string[]} opts.items - 欲翻譯的字串陣列
@param {string} opts.targetLang
@param {string} [opts.sourceLang]
@param {Array} [opts.glossary]
@param {boolean} [opts.preserveFormat]
@param {string} [opts.tone]
@returns {Promise<Array<{source:string,target:string,detectedSourceLang?:string}>>}
*/
export async function translateBatch(opts = {}) {
const { items = [], ...rest } = opts;
if (!Array.isArray(items) || items.length === 0) throw new Error("items 為必填陣列。");
const results = [];
for (const t of items) {
// 簡單串行；若要大量翻譯，可引入 p-limit 控制並行度
const r = await translateOne({ text: t, ...rest });
results.push(r);
}
return results;
}

/**

品質檢查（QE：Quality Estimation）
回傳 adequacy / fluency / terminology 三個 0~1 分數與建議。
@param {string} source 原文
@param {string} target 譯文
@param {Array} [glossary]
@returns {Promise<{scores:{adequacy:number,fluency:number,terminology:number}, suggestions:string[]}>}
*/
export async function qualityCheck(source, target, glossary = []) {
if (!source?.trim() || !target?.trim()) throw new Error("source 與 target 皆不可為空。");

const glos = buildGlossaryHint(glossary);
const sys = [
"你是專業的譯後品質評估員，請針對給定的原文與譯文評分與提出具體建議。",
"請以純 JSON 回覆，格式：",
"{"scores":{"adequacy":0.0,"fluency":0.0,"terminology":0.0},"suggestions":["..."]}",
"分數 0~1，小數到兩位。adequacy=忠實度；fluency=流暢度；terminology=術語一致性。",
glos
].filter(Boolean).join("\n");

const user = [
"=== 原文 ===",
source,
"=== 譯文 ===",
target,
].join("\n");

const res = await openai.chat.completions.create({
model: "gpt-4o-mini",
temperature: 0.1,
messages: [
{ role: "system", content: sys },
{ role: "user", content: user }
],
});

const raw = res.choices?.[0]?.message?.content?.trim() || "{}";
const json = raw.match(/(?:json)?\s*([\s\S]*?)/i)?.[1] ?? raw;
const obj = JSON.parse(json);
return obj;
}

index.js（修改：加入翻譯入口）
// index.js（節錄新增 translate 分支，其餘保留原狀）
import { translateOne, translateBatch, qualityCheck } from "./src/day12_translator.js";

const args = Object.fromEntries(
process.argv.slice(2).reduce((acc, cur, i, arr) => {
if (cur.startsWith("--")) {
const key = cur.replace(/^--/, "");
const val = arr[i + 1] && !arr[i + 1].startsWith("--") ? arr[i + 1] : true;
acc.push([key, val]);
}
return acc;
}, [])
);

async function main() {
const task = args.task || "chat";

if (task === "translate") {
const mode = args.mode || "one"; // one | batch | qe
const targetLang = args.to || "zh-TW";
const sourceLang = args.from || ""; // 可留空自動判斷

// 解析 glossary："A:B,C:D"
const glossary = (args.glossary || "")
  .split(",")
  .map(p => p.trim())
  .filter(Boolean)
  .map(pair => {
    const [source, target] = pair.split(":").map(s => s?.trim()).filter(Boolean);
    return source && target ? { source, target } : null;
  })
  .filter(Boolean);

if (mode === "one") {
  const text = args.text || "RAG enables retrieval over private knowledge bases.";
  const out = await translateOne({
    text,
    sourceLang,
    targetLang,
    glossary,
    preserveFormat: args.keepfmt !== "false",
    tone: args.tone || "neutral",
  });
  console.log("\n=== 單筆翻譯 ===\n");
  console.log(out.target);

} else if (mode === "batch") {
  // 批次：用分號 ;; 分隔
  const raw = args.texts || "Hello;;Good morning;;This is a test.";
  const items = raw.split(";;").map(s => s.trim()).filter(Boolean);
  const out = await translateBatch({
    items,
    sourceLang,
    targetLang,
    glossary,
    preserveFormat: args.keepfmt !== "false",
    tone: args.tone || "neutral",
  });
  console.log("\n=== 批次翻譯（JSON） ===\n");
  console.log(JSON.stringify(out, null, 2));

} else if (mode === "qe") {
  const src = args.src || "RAG enables retrieval over private knowledge bases.";
  const tgt = args.tgt || "RAG 讓你可以在私有知識庫上進行檢索。";
  const out = await qualityCheck(src, tgt, glossary);
  console.log("\n=== 品質檢查（QE） ===\n");
  console.log(JSON.stringify(out, null, 2));

} else {
  console.log("未知模式，請使用 --mode one | batch | qe");
}

} else {
// ...你原本的其他 task 分支（chat, image, vision, stt, tts, mm, docsum 等）
}
}

main().catch((e) => {
console.error("發生錯誤：", e.message);
process.exit(1);
});

package.json（新增 Scripts）
{
"scripts": {
"day12:one": "node index.js --task translate --mode one --from en --to zh-TW --text "RAG enables retrieval over private knowledge bases." --glossary "Retrieval-Augmented Generation:檢索增強生成,embedding:嵌入向量"",
"day12:batch": "node index.js --task translate --mode batch --from en --to zh-TW --texts "Hello;;This is a test;;Language models are useful."",
"day12:qe": "node index.js --task translate --mode qe --src "Please cancel my order." --tgt "請幫我取消訂單。""
}
}

▶️ CLI 使用範例